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Abstract 



This report contains the host and node programs for the 
solution of the shallow water equations with topography on an 
INTEL iPSC/2 hypercube. Finite difference scheme conserving 
potential enstrophy and energy is employed in each subdomain. 



1. Introduction 

In this report we supply software for the numerical solution 
of the shallow water equations on an INTEL iPSC/2 hypercube. The 
method is based on domain decomposition with overlap. Finite 
difference scheme is used to solve in each subdomain. The scheme 
conserves potential enstrophy and energy (Arakawa C grid [1].) 
See also Neta and Lustman [2]. The efficiency of the algorithm is 
81% when using 8 processors. 
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2 . Host program 

PROGRAM HOST 
c 

parameter (lparm0=10) 

c cube parameters 

parameter (nprocs=8 , node9=nprocs-l) 

parameter ( lbuf =10*nprocs+30+lparm0 , leninit=lbuf *4 ) 
c 4 bytes per float 

parameter ( inityp=9 14 ) 

parameter ( nodes=-l , idhost=2 , nodepid=3 ) 
c domain constants 

parameter (maxx=6000 , maxy=2000 , meshy=50 , meshx=50) 
parameter (j9=maxy/meshy, i9global=maxx/meshx-l) 
parameter (i9dim=4+ (l+i9global) /nprocs) 
c 

c notice that the domain size and mesh define the 
c dimensions of all the arrays 
c 

parameter ( myslv=l , ibev=2 , iav=3 , i0gv=4 ) 
parameter ( iminv=5 , imaxv=6 , i9v=7) 

common/dtcom/dt , trepo , ttot 
common/physcom/coriof , corioa , coriob , g 
common/uvhOcom/uO , vO , top , width , ho , parmO ( IparmO ) 



parameter (lenuvh=4* (i9dim+l) *3* ( j9+l) ) 
parameter (ku=l , kv=2 , kh=3 ) 

logical done(0:node9) , alldone 
dimension uvh(0: j9, 3, 0: i9dim) 
dimension buf(lbuf) 
dimension b00kp(10, 0:node9) 
equivalence (b00kp,buf) 



call getcube ( ' arakawa ' , ' ',1) 

call setpid ( idhost) 
nproc=numnodes ( ) 

print*, ' got the maximal cube, ' , nproc, ' nodes' 
call load ( 'node' , nodes, nodepid) 

print*,' domain parameters xmax,ymax,meshx,meshy=' 
, , maxx , maxy , meshx , meshy 

print*,' this generates a grid (0 : ' , i9global 
,,',0:',j9,')' 
do 13 i=0,node9 
13 done (i)=. false, 

call bokeph(buf) 
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l=10*nprocs+l 
call getdata 
buf (1) =dt 
1 = 1+1 

buf (1) =trepo 
1 = 1+1 

buf (l)=ttot 

nstep9=ttot/dt 

c the only parameter host must know is the maximal 

c number of steps, to decide whether the nodes 

c are done 

c 

1 = 1+1 

buf (1) =coriof 
1 = 1+1 

buf (l)=corioa 
1 = 1+1 

buf (1) =coriob 
1 = 1+1 
buf ( 1 ) =g 
1 = 1+1 
buf ( 1 ) =u0 
1=1 + 1 
buf (l)=vO 
1 = 1+1 

buf (1) =top 
1=1+1 

buf (1) =width 
1 = 1+1 
buf (l)=hO 
1 = 1+1 

do 976 j=l,lparmO 
buf (1 ) =parmO ( j ) 

1 = 1+1 

976 continue 

call csend ( inityp, buf , leninit, nodes, nodepid) 
c 

c now the host will wait for results 
c all come in uvh, but must be unscrambled using 
c the message type to be printed 
c 

n0=0 

open (UNIT=11 , FORM= ' FORMATTED ' , FILE= ' ARA ' ) 

1132 continue 

call crecv(-l,uvh, lenuvh) 
c any message at all 

ityp=inf otype ( ) 
n=ityp/100 
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c number of steps 

if(n.ge.nO) then 

write (UNIT=11 , FMT=7500) n*dt , n*dt/3600 , n*dt/24/3600 
7500 format(' Report after ' / fl0.0,' sec =',fl0.2,' hours =' 

, , f 10 . 2 , ' days ' ) 
n0=n+l 
c 

c to print the title just once, not for each processor 
c 

endif 

node=mod ( ityp, 100) 
imin=b00kp ( iminv, node) 
imax=b00kp ( imaxv, node) 
i0g=b00kp ( iOgv, node) 

c to convert i to global i 

do 76 i=imin,imax 
iglo=mod(i0g+i , i9global+l) 
do 76 j=0 , j 9 

write (UNIT=11 , FMT=7600) n, j+1 , iglo+1 
, , uvh( j ,ku, i) ,uvh ( j , kv, i) ,uvh(j ,kh,i) 
continue 

format (i7 , 2i4 , 3g20 . 5) 
done(node)= (n.ge.nstep9) 
continue 
alldone=done (0) 
do 23 i=l,node9 

alldone=alldone . and . done ( i) 
if (.not. all done) goto 1132 

more messages coming 



76 

7600 

452 



23 



c 
c 

c the test above replaces waitall, which cannot be used 
c 



call relcube( 'arakawa ' ) 

stop 

end 

subroutine bokeph (bOOkp) 
c 

c each node sees its data as an array (0:j9,0:i9) 
c 

c the column (,i) in the node data is the same as ( , i+iOglobal) in 
c the full matrix, which is (0: j9,0: i9global) 
c 

parameter (lparm0=10) 



c cube parameters 

parameter (nprocs=8 , node9=nprocs-l) 
c domain constants 

parameter (maxx=6 000, maxy=2 000 ,meshy=50,meshx=50) 
parameter (j 9=maxy/meshy, i9global=maxx/meshx-l) 
parameter ( i9dim=4+ ( l+i9global ) /nprocs) 
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c 

c notice that the domain size and mesh define the 
c dimensions of all the arrays 
c 

parameter ( myslv=l , ibev=2 , iav=3 , i0gv=4) 
parameter ( iminv=5 , imaxv=6 , i9v=7) 

dimension bOOkp ( 10 , 0 : node9) 
integer gray,ginv 
integer i0w(0:node9) 
integer i9w(0:node9) 



linnod=(i9global+l) /nprocs 
do 1 i=0,node9 
iOw(i) =i*l innod 
i9w(i) =i0w(i) +1 innod- 1 

1 continue 
iad=0 

15 continue 

if (i9w(node9) . ne . i9global) then 
i9w( iad) =i9w(iad) +1 
do 2 i=iad+l , node9 
i0w(i)= i0w(i)+l 
i9w(i)= i9w(i)+l 

2 continue 
iad=iad+l 
goto 15 
end if 

print* , iOw 
print* , i9w 
c 

c at this stage, iOw(slice) to i9w(slice) are the columns that 
c belong to slice, and will be advanced in time by 
c the processor that treats slice 
c 

c but it may need four additional columns, to compute 
c neighborhood averages 
c 

do 333 iam=0,node9 
mysl ice=ginv ( iam) 
i0wm=i0w(myslice) -2 
i0m=i0wm 

if (iOwm.lt.O) i0wm=i0wm+i9global+l 
iOw(myslice) =i0wm 
i9wm=i9w(myslice) +2 
i9m=i9wm 

if ( i9wm. gt . i9global ) i9wm=i9wm-i9global-l 

i9w(myslice) =i9wm 

i9=i9m-i0m 

ibefore=-l 

iafter=-l 
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myp=mod ( 3*nprocs+myslice+l , nprocs) 
mym=mod ( 3*nprocs+myslice-l , nprocs) 
iafter=gray (myp) 
ibefore=gray (mym) 

c== ================================== ============ 

c 

c also define imin, imax 

c these are the lines which this node should advance in time, 
c the other lines are for information only 
c 

imin=2 
imax=i9 -2 

c==== ========================== ================== 

bOOkp(iOgv, iam) =iOw(myslice) 
bOOkp (myslv, iam) =myslice 
bOOkp (ibev, iam)=ibefore 
bOOkp ( iav, iam) =iafter 
b00kp(i9v, iam)=i9 
bOOkp (imaxv, iam)=imax 
bOOkp ( iminv , iam) =imin 

print*,' I am 1 , iam 

print*,' my slice is ', myslice 

print*,' procs. before, after me= ' , ibefore, iafter 
print*,' my data have dim ( , 0 : ' ,i9,')' 

,,imin, ' to ',imax,' meaningful' 
iii=bOOkp ( iOgv, iam) 

print*,' my column 0 is global column ' , iii 
333 continue 

return 
end 

subroutine getdata 

parameter ( lparm0=10) 
c domain constants 

parameter ( maxx=6000 , maxy=2 000 , meshy=50 , meshx=50 ) 

common/dtcom/dt , trepo , ttot 
common/phy scom/coriof , corioa , coriob , g 
common/uvhOcom/uO , vO , top , width , ho , parmO ( lparmO) 
character*3 name 

minmsh=min ( meshx , meshy ) 
dt=150 . *minmsh/125 
ttot=24*3600*5 

5 days 

trepo= -ttot 

silly programming trick, so ttot, trepo 
may be entered in any order 

coriof=l.e-4 
corioa=0 



c 

c 

c 

c 

c 
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notice! length in km ! 



coriob=0 
g=9.8e-3 
c 

u0=20e-3 
v0=0 
top=2 

width=1000 

h0=5 

do 1 i=l,lparmO) 

1 parm0(i)=0 

c 

c all these are defaults, then more data may be read 
c 

1132 



100 



1131 



continue 
readlOO , name 
format (a3) 

if (name.eq. 'par ' ) then 
read* , i , x 

if ( i . gt . 0 . and. i . le . IparmO) 

parmO ( i) =x 
goto 1131 

else 

goto 1132 

endif 



then 



else 



goto 

goto 

then 



then 



if (name . eq. ' end ' ) 

i f ( name . eq . ' sto ' ) 

if (name.eq. 'dt ' ) 
read* , dt 
goto 1132 

endif 

if(name.eq. 'tre') 

read* , trepo 
goto 1132 

endif 

if (name.eq. ' tto ' ) then 
read* , ttot 
if (trepo. It . 0) 
goto 1132 

endif 

if (name . eq. ' cor ' ) then 
read* , coriof 
goto 1132 

endif 

if (name . eq. ' coa ' ) then 
read* , corioa 
goto 1132 

endif 

if (name.eq. 'cob' ) then 
read* , coriob 
goto 1132 

endif 



9999 

9999 



trepo= -ttot 
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9999 
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if (name . eq. ' g ') then 
read* , g 
goto 1132 

end if 

if (name.eq. 'uO ') then 
read*,uO 
goto 1132 

endif 

if (name.eq. 'vO ') then 
read*,vO 
goto 1132 

endif 

if (name.eq. 'hO ') then 
read*,hO 
goto 1132 

endif 

if (name.eq. 'top' ) then 
read* , top 
goto 1132 

endif 

if (name . eq. ' wid ' ) then 
read* , width 
goto 1132 

endif 

stop 

endif 

continue 
trepo=abs (trepo) 
print*, 'dt, treport, ttotal= ' 

, ,dt, trepo, ttot 

print*, 'coriolis f , corioa , coriob, g 
, , coriof, corioa , coriob, g 
print*, 'uO, vO, top, width, h0=' 

, ,uO,vO, top, width, hO 
do 2 i=l,lparmO 
print* ,parmO (i) 
return 
end 
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3. Node program 



c 



PROGRAM NODE 
parameter ( lparm0=10 ) 



c cube parameters 

parameter (nprocs=8 , node9=nprocs-l) 
parameter ( nodes=-l , idhost=2 , nodepid=3 ) 

c domain constants 

parameter (maxx=6 000 ,maxy=2 000 ,meshy=50 ,meshx=50) 
parameter ( j 9=maxy/meshy , i9global=maxx/meshx - 1) 
parameter ( i9dim=4+ ( l+i9global) /nprocs) 
c 

c notice that the domain size and mesh define the 
c dimensions of all the arrays 
c 



c 

c The array UVH contains the data u,v,h, in a format that allows 
c fast message passing. 

c At a fixed i, just send a buffer beginning at UVH(0,l,i) 
c with length 2 (columns) *3 (variables) * (j9+l) words 
c 

parameter (lenUVH =6*(j9+l)*4 ) 
c 4 bytes per real 

parameter ( inddt0=9140 , inddt9=9 149 ) 
parameter (ku=l,kv=2 , kh=3 ) 
common/allcom/ UVH (0 : j 9 , 3 , 0 : i9dim) 
zeta (0 : i9dim, 0 : j9 ) , hq ( 0 : i9dim, 0 : j 9 ) , q( 0 : i9dim , 0 : j 9 ) 
f (0 : i9dim, 0 : j9 ) , alf a (0 : i9dim, 0 : j 9 ) , beta ( 0 : i9dim , 0 : j 9 ) 
gama (0 : i9dim, 0 : j9) , delta (0 : i9dim, 0 : j9) 
eps ( 0 : i9dim, 0 : j9 ) , f i (0 : i9dim, 0 : j9 ) 

_,cay (0: i9dim, 0: j9) , ustar ( 0 : i9dim, 0 : j 9) ,vstar (0: i9dim, 0: j9) 
_,dudt (0: i9dim, 0: j9) , dvdt (0 : i9dim, 0 : j 9) , dhdt ( 0 : i9dim, 0 : j 9 ) 
hs (0 : i9dim, 0 : j9 ) , hu (0 : i9dim, 0 : j9 ) , hv ( 0 : i9dim, 0 : j 9 ) 

common/bkkpcom/mOdulO , iam,myslice, ibefore, iafter 
iOglobal , i9 , i8 , imin, imax 

common/dtcom/dt , trepo , ttot 

common/physcom/coriof , corioa , coriob , g 

common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO ) 

common/seqcom/lmnpr 
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c during debug only 
c 

dimension uold(0:i9dim / 0: j9) 
void (0: i9dim, 0: j9) , hold ( 0 : i9dim, 0: j9) 



c 

c during debug only 
c 

if (mynode() .ge.nprocs) stop 
iam=mynode ( ) 
lmnpr=1000 +100*iam 

m0dul0=i9global+l 

call init 

nreport=trepo/dt 

nstep9=ttot/dt 

dthalf=dt/2 

twodt=2*dt 

nstep=0 

call report (nstep, nreport, nstep9) 



c================================================ 

c sending data to the neighbors THE VERY FIRST TIME 

c the data get to different buffers, so the CALLS are synchronous 

c 

msgbefo=isend(inddt9,UVH(0, 1, imin) , lenUVH, ibefore , nodepid ) 
msgaftr=isend(inddtO,UVH(0, 1, imax-1) , lenUVH, iaf ter , nodepid ) 
c======— ========================================= 

10 continue 

c 

c Heun step 
c 

call defddt 



c 

c until now, UVH has not changed, before it changes, 
c ensure the sending worked 
c 

call msgwait (msgbefo) 
call msgwait (msgaftr) 

do 11 i=0 , i9 
do 11 j=0 , j 9 
uold( i, j ) =UVH ( j , ku, i) 

UVH ( j ,ku, i) =uold (i, j )+dudt (i, j ) *dthalf 
vold(i, j )=UVH( j ,kv, i) 

UVH ( j , kv, i) =vold ( i, j ) +dvdt (i, j ) *dthalf 
hold ( i , j ) =UVH ( j , kh , i ) 
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UVH ( j , kh, i) =hold (i , j ) +dhdt ( i, j ) *dthalf 

11 continue 

c =================================== ============= 

c 

c the step ends with sending data to the neighbors 

c the data get to different buffers, so the CALLs are synchronous 

c 

msgbefo=isend ( inddt9 ,UVH(0 , 1 , imin) , lenUVH, ibefore , nodepid ) 
msgaftr=isend ( inddtO ,UVH ( 0,1, imax-1) , lenUVH, iafter , nodepid ) 
call defddt 
c 

c until now, UVH has not changed, before it changes, 
c ensure the sending worked 
c 

call msgwait (msgbefo) 
call msgwait (msgaftr) 

c=================== ============ ================= 

do 12 i=0,i9 
do 12 j=0,j9 

UVH ( j ,ku, i) =uold ( i , j ) +dudt ( i , j ) *dt 
UVH ( j , kv , i ) =vold ( i , j ) +dvdt ( i , j ) *dt 
UVH ( j , kh , i ) =hold ( i , j ) +dhdt ( i , j ) *dt 

12 continue 

c================================================ 

c 

c the step ends with sending data to the neighbors 

c the data get to different buffers, so the CALLs are synchronous 

c 

msgbefo=isend(inddt9,UVH(0, 1, imin) , lenUVH, ibefore , nodepid ) 
msgaftr=isend(inddtO,UVH(0, 1, imax-1) , lenUVH, iafter , nodepid ) 
nstep=nstep+l 

call report (nstep, nreport, nstep9) 

125 continue 

c 

c following leapfrog steps 
c 

call defddt 
c 

c until now, UVH has not changed, before it changes, 
c ensure the sending worked 
c 

call msgwait (msgbefo) 
call msgwait (msgaftr) 

c================================================ 

do 13 i=0,i9 
do 13 j=0 , j 9 
temp=UVH ( j , ku , i ) 

UVH ( j , ku , i ) =uold ( i , j ) +dudt ( i , j ) *twodt 
uold (i , j ) =temp 
temp=UVH ( j , kv , i ) 

UVH ( j , kv , i ) =vold ( i , j ) +dvdt ( i , j ) *twodt 
void ( i , j ) =temp 
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temp=UVH ( j , kh , i ) 

UVH ( j , kh, i) =hold( i, j ) +dhdt (i , j ) *twodt 
hold ( i , j ) =temp 
13 continue 

c ========= ======================================= 

c 

c the step ends with sending data to the neighbors 

c the data get to different buffers, so the CALLs are synchronous 

c 

msgbefo=isend ( inddt9 ,UVH(0, 1, imin) , lenUVH, ibefore, nodepid ) 
msgaftr=isend(inddtO,UVH(0, 1, imax-1) , lenUVH, iafter, nodepid ) 
c========= ============================ =========== 

nstep=nstep+i 

call report (nstep, nreport, nstep9) 
if( mod (nstep+1 , 300) . eq. 1) then 
goto 10 
else 

goto 125 

endif 

end 

subroutine UVHpr (UVH, t , kUVH, leadim, i9 , j 9 ) 

parameter (ku=l, kv=2 , kh=3) 

character* ( *) t 

common/ seqcom/lmnpr 

dimension UVH (0 : j9 , 3 , 0 : leadim) 

common/bkkpcom/mOdulO , iam,myslice, ibefore, iafter 
_, iOglobal , i90 , i8 , imin, imax 
c 

c comment out the return during debug 
c 

return 

print2000 , 10000*lmnpr , t , leadim, i9 , j9 , imin, imax, iam 
2000 format(il0,2x,a20, ' leadim, i9 , j 9= ', 3i4 , ' imin, imax= ' , 3i4) 

do 1 i=0,i9 

ii=mod (i+iOglobal ,m0dul0) 

if ( i .ge . imin. and. i . le. imax) then 

iamy=iam 

else 

iamy=iam+10 

endif 

do 1 j=0,j9 

printlOOO, ii+100* ( j+100*lmnpr) , t, j , ii 
_ , UVH ( j , kUVH , i) , iamy 
1 continue 

1000 format (ilO, 2x, a 10, 2x, 2i4 , f20. 7, i3) 

lmnpr=lmnpr+l 
return 
end 
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subroutine bokepn (bOOkp) 
c 

c the node sees its data as an array (0: i9dim, 0: j9) 
c 

c the column (,i) in the node data is the same as ( , i+iOglobal) in 
c the full matrix, which is (0 : i9dimglobal , 0 : j9) 
c 

c most of the work was done by the host, and 
c data is just rearranged here 
c 

parameter (lparm0=10) 



c cube parameters 

parameter (nprocs=8 , node9=nprocs-l) 
c connection parameters 

parameter ( myslv=l, ibev=2 , iav=3 , i0gv=4) 
parameter ( iminv=5, imaxv=6, i9v=7) 



dimension b00kp(10, 0:node9) 
common/ seqcom/ lmnpr 

common/bkkpcom/mOdulO, iam,myslice, ibefore, iafter 
, iOglobal , i9 , i8 , imin, imax 



iam=mynode ( ) 

myslice=b00kp (myslv, iam) 
ibefore=b00kp ( ibev, iam) 
iaf ter=b00kp ( iav , iam) 
i0global=b00kp ( iOgv, iam) 
i9=b00kp ( i9v, iam) 
imin=b00kp ( iminv , iam) 
imax=b00kp ( imaxv , iam) 
i8=i9-l 



c print during debug only 
c 

cdebug key=lmnpr*10000+100*iam 

cdebug print*, key, ' ... I am ' , iam 

cdebug key=key+l 

cdebug print*, key,' slice=', myslice 

cdebug key=key+l 

cdebug print*, key, ' nodes bef , aft= ' , ibefore , iafter 

cdebug key=key+l 

cdebug print*, key,' data dim (0:' , i9,',)' 

cdebug key=key+l 

cdebug print*, key,' ',imin, ' to ',imax,' meaningful' 

cdebug key=key+l 

cdebug print*, key, ' my column 0=global ', iOglobal 

cdebug lmnpr=lmnpr+l 



return 

end 
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function coriofun ( i, j , meshx, meshy) 
parameter ( lparm0=10) 

common/bkkpcom/mOdulO, iam,myslice, ibefore, iafter 
_, iOglobal ,19,18, imin, imax 
common/d tcom/dt , trepo , ttot 
common/physcom/coriof , corioa , coriob , g 
common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO ) 

x=meshx*mod ( iOglobal+i , mOdulO) 
y=meshy* ( j ) 

c x,y for f — like zeta — from C-mesh 

coriofun=coriof 
c 

c for the simplest case, coriolis is constant, but in general 
c it might be computed using x,y and the parmO data 
c or corioa, coriob — tangent plane, if needed 
c 

return 

end 

subroutine defddt 
c 

c the results always in dudt, dvdt, dhdt in aLlcom 
c 

parameter (lparm0=10) 

c cube parameters 

parameter ( nprocs=8 , node9=nprocs-l) 
c domain constants 

parameter (maxx=6000 ,maxy=2000,meshy=50 ,meshx=50) 
parameter ( j 9=maxy/meshy , i9global=maxx/meshx-l) 
parameter ( i9dim=4+ ( l+i9global) /nprocs) 
c 

c notice that the domain size and mesh define the 
c dimensions of all the arrays 
c 



c 

c The array UVH contains the data u,v,h, in a format that allows 
c fast message passing. 

c At a fixed j, just send a buffer beginning at UVH(0,l,j) 
c with length 6*(i9+l) words = 2 rows of 3 vars each 
parameter (lenUVH =6*(j9+l)*4 ) 
c 4 bytes per real 

parameter (inddt 0=9 140 , inddt9=9149) 
c 

parameter (ku=l , kv=2 , kh=3 ) 
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common/allcom/ UVH (0: j 9 , 3 , 0 : i9dim) 

, zeta ( 0 : i9dim, 0 : j9) , hq(0 : i9dim, 0 : j 9 ) , q (0 : i9dim, 0 : j 9 ) 

, f ( 0: i9dim, 0 : j 9) ,alfa(0: i9dim, 0 : j 9) , beta ( 0 : i9dim, 0 : j 9 ) 

, gama (0: i9dim, 0 : j9) , delta ( 0 : i9dim, 0: j9) 
eps (0: i9dim, 0 : j 9) , f i (0 : i9dixn, 0 : j 9) 

, cay (0 : i9dim, 0 : j9) ,ustar(0: i9dim, 0: j 9 ) ,vstar (0: i9dim, 0: j9) 
, dudt (0 : i9dim, 0 : j9) , dvdt (0 : i9dim, 0 ; j 9 ) , dhdt ( 0 : i9dim, 0 : j 9) 

, hs(0: i9dim, 0: j 9 ) , hu(0: i9dim, 0: j 9 ) , hv(0: i9dim, 0 : j 9 ) 

common/bkkpcom/mOdulO, iam,myslice, ibefore, iafter 
, iOglobal, i9, i8, imin, imax 

common/dtcom/dt , trepo , ttot 

common/physcom/cor iof , cor ioa , cor iob , g 

coiranon/UVHOcom/uO , vO , top , width , ho , parmO ( lparmO ) 



i7=i8-l 
dx=meshx 
dy=meshy 
j 8= j 9-1 
j 7= j 9-2 

c================================================ 

c 

c any such step begins with receiving data from the neighbors 
c the data get to different buffers, so the CALLS are synchronous 
c 

in0=irecv(inddt0,UVH(0, 1,0), lenUVH ) 
in9=irecv ( inddt9 , UVH ( 0 , 1 , i8 ) , lenUVH ) 
call msgwait(inO) 
call msgwait(in9) 

CALL UVHpr(UVH, 'u after irecv ' , ku, i9dim, i9 , j 9 ) 

CALL UVHpr(UVH, 'v after irecv ' , kv , i9dim, i9 , j 9 ) 

CALL UVHpr(UVH, 'h after irecv ' , kh , i9dim, i9 , j 9) 

c 

c The big mess is that u,v,h are not defined everywhere! 
c u defined for 0<=i<=i9, 0<=j < j9 

c v defined for 0<=i < i9, 0<=j<=j9 

c h defined for 0<=i < i9, 0<=j < j9 

c 

c But because of periodicity in i , which is maintained by the 
c send+receive : 

c v defined for 0<=i<=i9 , 0<= j <=j 9 

c h defined for 0<=i<=i9, 0<=j < j9 

c 

c So, finally, only the following are MISSING: 
c u(,j9) , h(,j9) 
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c 

c NOW WE DEFINE ALL VARIABLES WHEREVER POSSIBLE 
C THEN, 

c SEE IF d/dt IS DEFINED WHERE IT SHOULD: 

c imin<=i<=imax, jmin<=j<=jmax 

c 

c imin,imax from bookkeeping, 

c jmin,jmax depend on the variable u,v,h 

c 

c IT TURNS OUT THAT EVERYTHING IS SAFE PROVIDED: 2<=imin , imax < i8 

c 

c N.B. imax less than i8 ! 

c 

c 

c 3.12 in the paper follows 

c 

c 

do 3120 i=l,i9 
do 3121 j =1 , j 8 
c 

c both i,j safe 
c 

zeta ( i, j ) = (UVH ( j -1, ku, i) - UVH( j ,ku, i) )/dy 
_+ (UVH ( j , kv, i) -UVH ( j , kv , i-1) ) /dx 
3121 continue 

c 

c THE PAPER HAS THE COMPUTATIONAL BD. CD. ZETA=0 
c 

zeta(i,0)=0 
zeta(i, j9)=0 
3120 continue 

312 continue 

cdebug CALL matpr ( ' zeta ' , zeta, i9dim, i9 , j 9) 



c 

c 3.15 in the paper follows 

c 

c 

do 3150 j = l , j 8 
do 3151 i=l,i9 
c 

c both i,j safe, periodic bd.cd in i automatic 
c 

hq(i, j)=(UVH( j ,kh, i)+UVH(j ,kh, i-l)+UVH(j-l,kh, i-1) 
++UVH ( j -1 , kh , i) )/4 
3151 continue 

3150 continue 
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c 

c this still leaves hq undefined at j=0 
c 

c do some extrapolation, maybe zeta=0 will take care of 
c instability 
c 

do 3156 i=l,i9 

hq ( i , 0) =3*hq ( i , 1) -3 *hq ( i , 2 ) +hq ( i , 3 ) 

3156 continue 
c 

c this still leaves hq undefined at j = j 9 
c 

c do some extrapolation, maybe zeta=0 will take care of 
c instability 
c 

do 3157 i=l,i9 

hq ( i , j 9 ) =3*hq ( i , j 9-1) -3*hq ( i, j 9-2 ) +hq ( i , j 9-3) 

3157 continue 

315 continue 

cdebug CALL matpr ( ' hq ' , hq, i9dim, i9 , j 9) 



c 

c 3.11 in the paper follows 
c 

do 311 i=l, i9 
do 311 j=0 , j 9 

q(i,j)=(f(i,j)+ zeta(i, j) ) / hq(i,j) 

311 continue 

cdebug CALL matpr ( ' q ' , q, i9dim, i9 , j 9 ) 

c 

c 3.34 in the paper follows 
c 

do 334 j=0 , j 8 

c the same j loop all over 
do 3340 i=l,i8 

eps(i, j)=(q(i+l, j+l)+ q ( i , j+1) -q ( i , j ) -q ( i+1 , j ) ) /24 
fi(i, j)=(-q(i+l, j + l) + q(i, j + l)+q(i, j) - q(i+l,j))/24 
c 

c these have indices i+1/2, j+1/2 only, like h 
c 

3340 continue 
do 3341 i=2 , i8 

alfa(i,j)=(2*q(i+l,j+l) +q ( i , j+1) + 2*q(i,j)+ q(i+l,j))/24 
beta (i,j)=(q(i,j+l)+ 2*q(i-l, j+1) + q(i-l,j)+ 2*q(i,j))/24 
gama (i,j)=(2*q(i,j+l)+ q(i-l,j+l)+ 2*q(i-l,j)+ q(i,j))/24 
delta (i,j)=(q(i+l,j+l)+2*q(i,j+l)+ q(i, j)+2*q(i+l, j) )/24 

3341 continue 
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c closing the j loop 
334 continue 

cdebug CALL matpr ( ' eps ' , eps , i9dim, i9 , j 9) 

cdebug CALL matpr ( 1 f i ' , f i , i9dim, i9 , j 9) 



cdebug 

cdebug 

cdebug 

cdebug 



CALL matpr ( 'alfa' ,alfa, i9dim, i9 , j 9 ) 
CALL matpr ( ' beta ' , beta, i9dim, i9 , j 9 ) 
CALL matpr ( 'gama ' , gama, i9dim, i9 , j9) 
CALL matpr ( ' delta ' , delta , i9dim , i9 , j 9 ) 



c 

c 3.36-3.37 in the paper follow 
c 

do 336 j=0,j8 
do 336 i=l,i9 

hu(i, j)=(UVH(j ,kh, i-1) +UVH ( j , kh , i ) )/2 

336 continue 
do 337 i=0,i9 
do 337 j=l , j 9 

hv (i, j ) = (UVH ( j-1 , kh, i) + UVH( j , kh, i) )/2 
c 

c at j=0 and j = j 9 , hv may be anything, because v=0 
c 

337 continue 

cdebug CALL matpr ( 'hu' ,hu, i9dim, i9 , j 9 ) 

cdebug CALL matpr ( 'hv' ,hv, i9dim, i9, j 9 ) 



c 3.41 in the paper follows 
c 

do 341 i=0,i8 
do 341 j =0 , j 8 

cay (i, j)=(UVH(j ,ku, i) **2 +UVH( j ,ku, i+1) **2 
_+UVH( j ,kv, i) **2 +UVH ( j + 1 , kv , i ) **2)/4 
341 continue 

cdebug CALL matpr ( 'cay' , cay, i9dim, i9,j9) 



c 3. 3-3. 4 in the paper follow 
c 

do 33 j = 0 , j 8 
do 33 i=l,i9 

ustar(i,j)= hu(i,j)* UVH(j,ku,i) 
33 continue 
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do 34 i=0,i9 
vstar ( i , 0) =0 
c 

c bd.cd v=0 

c 

do 34 j =1 , j 9 

vstar(i,j)= hv(i,j)* UVH(j,kv,i) 

34 continue 

cdebug CALL matpr (' ustar ', ustar , i9dim, i9 , j 9) 

cdebug CALL matpr (' vstar ', vstar , i9dim, i9 , j9 ) 

c 

c 3. 1-3. 2 in the paper follow 
c 

do 32 i=l,i8 
do 32 j=0,j8 
c 

c this is where dhdt values are defined, they are needed for: 
c imin <= i <= imax 

c 0 <= j <= j 8 

c NOT needed for j = j 9 

c 

dhdt ( i / j ) = ( ustar ( i , j ) -ustar ( i+1 , j ) ) /dx 
_+ (vstar ( i, j ) -vstar ( i , j+1) ) /dy 

31 continue 

32 continue 

cdebug CALL matpr (' dhdt dhdt , i9dim, i9 , j 9 ) 

c 

c 3.5 in the paper follows to compute du/dt 

c 

c 

do 35 j=0,j8 
do 35 i=2,i8 
c 

c this is where dudt is defined . it is needed for: 
c imin <= i <= imax 

c 0 <= j <= j 8 

c NOT needed for j = j 9 

c 

c 2.5 in the paper follows, defining capital phi in terms of h,hs 
c 

caphil=g* (UVH( j , kh, i-1) +hs(i-l,j)) 
caphi2=g* (UVH( j ,kh, i) +hs(i,j)) 

dudt(i,j)= alfa(i,j)* vstar (i, j+1) +beta ( i , j ) * vstar (i-1, j+1) 
_+gama(i,j)* vstar(i-l,j) 

_+delta ( i , j ) * vstar ( i , j ) -eps ( i , j ) *ustar ( i+1 , j ) 

_+eps(i-l, j) * ustar(i-l,j) 

_+(cay(i-l, j)+ caphil -cay(i,j)- caphi2)/dx 

35 continue 
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cdebug CALL matpr (' dudt ', dudt , i9dim, i9 , j 9 ) 

c 

c 3.6 in the paper follows to compute dv/dt 
c 

do 36 i=2 , i8-l 
do 36 j=l , j 8 
c 

c this is where dvdt is defined 
c it is needed for 
c imin <= i <= imax 

c 1 <= j <= j 8 

c 

c dvdt NOT needed at j=0 or j=j9, because there v=0 always 
c 

c 2.5 in the paper follows, defining capital phi in terms of h,hs 
c 

caphi3=g* (UVH ( j -1 , kh, i) +hs(i,j-l)) 
caphi4=g* (UVH ( j ,kh, i) +hs(i,j)) 

dvdt(i,j)= -gama (i+1 , j ) * ustar (i+1, j) - delta(i,j)* ustar(i,j) 
alfa (i , j — 1) *ustar (i , j-1) - beta (i+1, j-1) * ustar (i+1, j-1) 

_+f i (i, j-1) *vstar (i,j-l)+fi(i,j)* vstar ( i , j+1) 

_+(caphi3+cay (i, j-1) -caphi4 - cay(i,j))/dy 
36 continue 

cdebug CALL matpr (' dvdt ' , dvdt , i9dim, i9 , j 9 ) 

return 
end 

function hsfun (i , j ,meshx, meshy) 
parameter ( lparm0=10 ) 
parameter (maxx=6000 ,maxy=2 000) 

common/bkkpcom/mOdulO , iam,myslice, ibefore, iafter 
_, iOglobal , i9 , i8 , imin, imax 
common/dtcom/dt , trepo , ttot 
common/physcom/coriof , corioa , coriob , g 
common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO ) 

x=meshx* (mod ( i0global+i,m0dul0) +0.5) 
y=meshy * ( j +0 . 5 ) 

c x,y for h from C-mesh 

xx=abs (x-0 . 5*maxx) 
if (xx. It. width) then 
hsfun=top* ( l-xx/width) 
else 
hsfun=0 
endif 
c 

c this is the triangular ridge, more generally, 
c hsfun might be computed using x,y and the parmO data 
c 

return 

end 
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subroutine init 



parameter ( lparm0=10) 

c cube parameters 

parameter (nprocs=8 , node9=nprocs-l) 

parameter ( lbuf =10 *nprocs+30+lparm0 , leninit=lbuf *4 ) 
c 4 bytes per float 

parameter ( inityp=914 ) 
c domain constants 

parameter (maxx=6000 ,maxy=2000 , meshy=50 ,meshx=50) 
parameter (j9=maxy/meshy, i9global=maxx/meshx - 1) 
parameter ( i9dim=4+ ( l+i9global) /nprocs) 
c 

c notice that the domain size and mesh define the 
c dimensions of all the arrays 
c 



parameter (ku=l , kv=2 , kh=3 ) 
common/allcom/ UVH(0: j9, 3, 0: i9dim) 

, zeta ( 0 : i9dim, 0 : j 9) , hq(0 : i9dim, 0 : j9) , q (0 : i9dim, 0 : j 9) 

, f ( 0 : i9dim, 0 : j9 ) , alfa (0 : i9dim, 0 : j9) , beta (0 : i9dim, 0 : j9) 

, gama ( 0 : i9dim, 0 : j 9) , delta (0 : i9dim, 0 : j9) 

, eps ( 0 : i9dim, 0 : j 9 ) , f i (0 : i9dim, 0 : j9) 

, cay ( 0 : i9dim, 0 : j9) , ustar (0 : i9dim, 0 : j 9 ) , vs tar ( 0 : i9dim, 0 : j 9 ) 
, dudt ( 0 : i9dim, 0 : j9) , dvdt ( 0 : i9dim, 0 : j 9 ) , dhdt (0 : i9dim, 0 : j 9 ) 

, hs (0 : i9dim, 0 : j9) , hu ( 0 : i9dim, 0 : j 9) , hv ( 0 : i9dim, 0 : j 9) 

common/bkkpcom/mOdulO , iam, my si ice , ibef ore , iafter 
, iOglobal , i9 , i8 , imin, imax 

common/dtcom/dt , trepo, ttot 

common/physcom/coriof , cor ioa , cor iob , g 

common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO ) 

dimension buf(lbuf) 

call crecv ( inityp, buf , leninit) 
call bokepn (buf) 
l=10*nprocs+l 
dt=buf (1) 

1=1+1 

trepo=buf (1) 

1=1+1 

ttot=buf (1) 

1=1+1 

coriof=buf (1) 

1=1+1 

corioa=buf ( 1) 
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1=1+1 

coriob=buf (1) 

1=1+1 
g=buf (1) 

1=1+1 
uO=buf (1) 

1=1+1 
vO=buf (1) 

1=1+1 
top=buf (1) 

1=1+1 

width=buf (1) 

1=1+1 
hO=buf (1) 

1=1+1 

do 976 j=l,lparmO 
panaO ( j ) =buf (1) 

1=1+1 

976 continue 

c 

c the common allcom is filled with trash, to check 
c glitches in index manipulation 
c 

c but some variables get meaningful values: f ,vstar, . 
c 

do 1 i=0,i9 
do 1 j=0 , j 9 

UVH ( j , ku , i ) =1 . e6+ j+100*i 
zeta(i, j ) =3 . e6+j+100*i 
UVH(j,kh,i) =4 . e6+j+100*i 
hq(i, j ) =5 . e6+j+100*i 
q(i, j )=6.e6+j+l00*i 
f ( i , j ) =coriofun ( i , j , meshx , meshy) 
alfa(i, j)=8 . e6+j+100*i 
beta (i, j ) =9 . e6+j+100*i 
gama(i, j )=-l. e6-j-100*i 
delta (i , j ) =-2 .e6-j-l00*i 
eps (i, j ) =-3 . e6-j -100* i 
fi(i, j)=-4 .e6-j-100*i 
hu(i, j)=-5.e6-j-l00*i 
hv(i, j)=-6.e6-j-l00*i 
cay (i/ j ) =-7 . e6-j-100*i 
ustar(i, j)=-8.e6-j-l00*i 
UVH ( j , kv , i ) =o 
vstar(i, j)=0 
c 

c implementation of WALL bd.cd 
c 

dudt (i, j)=0 
dvdt ( i , j ) =0 
dhdt (i , j ) =0 
1 continue 
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c initial data, defined on PART OF the arrays 
c 



j 8=j 9-1 
do 170 i=0,i9 
do 170 j=0 , j 8 

UVH(j/ku,i) =u0fun(i, j ,meshx, meshy ) 
continue 
do 171 i=0,i9 
do 171 j = 1 , j 8 
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<=> 



implementation of WALL bd.cd 



c 1<= j <=j 8 
c 

UVH(j,kv,i) =v0fun(i, j ,meshx, meshy ) 

171 continue 

do 172 i=0 , i9 

do 172 j=0 , j 9 

hs ( i , j ) =hsf un ( i , j , meshx , meshy) 
c 

c hs is defined everywhere, it is the geography 
c 

172 continue 

do 271 i=0,i9 

do 271 j=0,j8 

UVH ( j , kh, i) =h0-hs (i , j ) 

271 continue 



CALL UVHpr(UVH,'u init ' , ku, i9dim, i9 , j 9) 
CALL UVHpr(UVH,'v init ' , kv , i9dim, i9 , j 9) 
CALL UVHpr(UVH, 'h init ' , kh , i9dim, i9 , j 9 ) 



return 

end 

subroutine matpr (t , a, leadim, i9 ,j9) 
common/seqcom/lmnpr 

common/bkkpcom/mOdulO , iam,myslice, ibefore, iafter 
_, iOglobal , i90 , i8 , imin, imax 
character* (*) t 
dimension a ( 0 : leadim, 0 : j 9 ) 
c 

c comment out the return during debug 
c 

return 

print2000, 10000 *lmnpr , t , leadim, i9 , j 9 , imin, imax, iam 
2000 format ( ilO , 2x, alO, ' leadim, i9 , j9= •, 3i4 , • imin, imax= • , 3i4 ) 

do 1 i=0,i9 

ii=mod(i+i0global ,m0dul0) 

if ( i . ge . imin. and. i . le . imax) then 

iamy=iam 

else 
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iamy=iam+10 

endif 

do 1 j=0 , j 9 

print 1000 , ii+100* ( j+100*lxnnpr) ,t,j,ii,a(i,j) , iamy 
1 continue 

1000 format(il0,2x,al0,2x,2i4, f20.7, i3) 

lmnpr=lmnpr+l 
return 
end 

subroutine report (n, nrep, nmax) 
c 

c n is the number of steps 



parameter ( lparm0=10) 

c cube parameters 

parameter (nprocs=8 , node9=nprocs-l) 
parameter (nodes=-l , idhost=2 ,nodepid=3 ) 
c domain constants 

parameter (maxx=6000,maxy=2 000 ,meshy=50 / meshx=50) 
parameter (j9=maxy/meshy, i9global=maxx/meshx - 1) 
parameter (i9dim=4+ ( l+i9global) /nprocs) 
c 

c notice that the domain size and mesh define the 
c dimensions of all the arrays 
c 

c 

c The array UVH contains the data u,v,h, in a format that allows 
c fast message passing, 
c 

parameter (len=12* (i9dim+l) * ( j9+l) ) 
parameter (ku=l , kv=2 , kh=3 ) 
common/allcom/ UVH (0 : j9 , 3 , 0 : i9dim) 
zeta (0 : i9dim, 0: j 9 ) , hq(0: i9dim, 0 : j 9 ) , q(0: i9dim, 0 : j 9 ) 

, f (0: i9dim, 0: j 9 ) , alf a (0 : i9dim, 0 : j 9) , beta ( 0 : i9dim, 0 : j 9 ) 

_,gama(0: i9dim f 0: j9) ,delta(0: i9dim,0: j9) 
eps ( 0 : i9dim, 0: j 9 ) , f i (0: i9dim, 0 : j 9 ) 

, cay (0 : i9dim, 0: j9) ,ustar (0: i9dim, 0: j 9 ) , vstar (0: i9dim, 0: j9) 

dudt ( 0 : i9dim, 0 : j9) , dvdt (0 : i9dim, 0 : j 9 ) , dhdt (0: i9dim, 0: j 9) 
hs (0 : i9dim, 0: j 9 ) ,hu(0: i9dim, 0: j 9 ) ,hv (0: i9dim, 0: j9) 

common/bkkpcom/mOdulO, iam,myslice, ibefore, iafter 
iOglobal , i9 , i8 , imin, imax 

common/ dtcom/dt , trepo , ttot 

common/ physcom/ cor iof , cor ioa , cor iob , g 

common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO) 
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if (rood (n, nrep) .ne. 0) return 
itype=n*100+iam 
452 continue 

myh=myhost ( ) 

call csend ( itype, UVH, len,myh, idhost ) 

if (n.ge.nmax) stop 5144 

return 

end 

function u0fun(i, j ,meshx, meshy) 
parameter ( lparm0=10 ) 

common/bkkpcom/mOdulO , iam,myslice, ibefore, iafter 
_, iOglobal, i9, i8 , imin, imax 
common/dtcom/dt , trepo , ttot 
common/physcom/cor iof , cor ioa , cor iob , g 
common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO ) 

x=meshx*mod ( iOglobal+i , mOdulO) 
y=meshy* ( j+0 . 5) 

c x,y for u from C-mesh 

u0fun=u0 
c 

c for the simplest case, u is constant, but in general 
c it might be computed using x,y and the parmO data 
c 

return 

end 

function v0fun(i,j ,meshx, meshy) 
parameter ( lparm0=10 ) 

common/bkkpcom/mOdulO , iam,myslice, ibefore, iafter 
_, iOglobal, i9, i8, imin, imax 
common/dtcom/dt , trepo , ttot 
common/physcom/coriof , corioa , coriob, g 
common/UVHOcom/uO , vO , top , width , hO , parmO ( IparmO ) 

x=meshx* (mod ( i0global+i,m0dul0) +0 . 5) 
y=meshy* ( j ) 

c x,y for v from C-mesh 

v0fun=v0 
c 

c for the simplest case, v is constant, but in general 
c it might be computed using x,y and the parmO data 
c 

return 

end 
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4. Makefile 



# This file is used to compile and link the host.f, node.f 

# 

# The command "make all" causes compilation and linking. 



all : host node 



host.o: host.f 

node.o: node.f 

host: host.o 

f 77 -o host host.o -host 



node : node . o 

f77 -o node node.o -node 
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5. Input File 



dt 

60 

ttotal 

79800 

top 

2 

width 

1000 

uO 

20.e-3 

end 
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